#sample validation------------------------------------------------------------

make_means_t1 <- function(df){
  df %>%
    mutate(age_under_20 = (AGE <= 20)*1,
           age_21_25 = (AGE >= 21 & AGE <= 25)*1,
           age_26_30 = (AGE >= 26 & AGE <= 30)*1,
           age_31_35 = (AGE >= 31 & AGE <= 35)*1,
           java_resident = (kab_code_residence > 3000 & kab_code_residence < 4000)*1) %>%
    group_by(t1) %>%
    summarise(age_under_20 = mean(age_under_20),
              age_21_25 = mean(age_21_25),
              age_26_30 = mean(age_26_30),
              age_31_35 = mean(age_31_35),
              #religion
              islam = mean(RELIGION == 1, na.rm = T),
              christian = mean(RELIGION == 2, na.rm = T),
              catholic = mean(RELIGION == 3, na.rm = T),
              other = mean(RELIGION %in% c(4, 5, 6, 7, 8), na.rm = T),
              
              #locaion
              java_resident = mean(java_resident, na.rm =T),
              
              female = mean(GENDER == 2, na.rm = T),
              got_job = mean(got_job)) %>%
    t() %>%
    data.frame() %>%
    rownames_to_column(., var = "var") %>%
    set_colnames(c("var", "val_pass", "val_fail")) %>%
    pivot_longer(cols = val_pass:val_fail, names_to = "outcome", values_to = "values") %>%
    return()
}

make_means_t3 <- function(df){
  
      df %>%
      mutate(age_under_20 = (AGE <= 20)*1,
             age_21_25 = (AGE >= 21 & AGE <= 25)*1,
             age_26_30 = (AGE >= 26 & AGE <= 30)*1,
             age_31_35 = (AGE >= 31 & AGE <= 35)*1,
             java_resident = (kab_code_residence > 3000 & kab_code_residence < 4000)*1) %>%
      group_by(t3) %>%
      summarise(age_under_20 = mean(age_under_20),
                age_21_25 = mean(age_21_25),
                age_26_30 = mean(age_26_30),
                age_31_35 = mean(age_31_35),
                #religion
                islam = mean(RELIGION == 1, na.rm = T),
                christian = mean(RELIGION == 2, na.rm = T),
                catholic = mean(RELIGION == 3, na.rm = T),
                other = mean(RELIGION %in% c(4, 5, 6, 7, 8), na.rm = T),
                
                #locaion
                java_resident = mean(java_resident, na.rm =T),
                
                female = mean(GENDER == 2, na.rm = T),
                got_job = mean(got_job)) %>%
      t() %>%
      data.frame() %>%
      rownames_to_column(., var = "var") %>%
      set_colnames(c("var", "val_pass", "val_fail")) %>%
      pivot_longer(cols = val_pass:val_fail, names_to = "outcome", values_to = "values") %>%
      return()
}


plot_data <-
  make_means_t3(analysis_df_subset %>% filter(abs(forcing_f3) < 5)) %>% mutate(type = "sample", treat = "Testing Effect Sample") %>% 
  bind_rows(., make_means_t1(analysis_df_subset %>% filter(abs(forcing_t1) < 1)) %>% mutate(type = "sample", treat = "Service Effect Sample")) %>%
  bind_rows(., make_means_t3(analysis_df  %>% filter(abs(forcing_f3) < 5)) %>% mutate(type = "universe", treat = "Testing Effect Sample")) %>%
  bind_rows(., make_means_t1(analysis_df  %>% filter(abs(forcing_t1) < 1)) %>% mutate(type = "universe", treat = "Service Effect Sample")) %>%
  filter(!(var %in% c("t3", "t1")))


label_df <-
  data.frame(label = c("Age: < 20", "Age: 21-25", "Age: 26-30", "Age: 31-35", 
               "Religion: Islam", "Religion: Protestant", "Religion: Catholic", "Religion: Other", 
               "Location: Java",
               "Gender: Female",
               "Disposition: No job"),
             var = c("age_under_20", "age_21_25", "age_26_30", "age_31_35", "islam", "christian", "catholic", "other", 
               "java_resident", "female", "got_job"))

sample_validation <-
  plot_data %>%
  left_join(., label_df) %>%
  mutate(outcome = case_when(outcome == "val_fail" ~ "Failed Exam",
                             outcome == "val_pass" ~ "Passed Exam",
                             TRUE ~ NA_character_)) %>%
  ggplot(aes(x=fct_rev(label), group = type, fill = type, y = values*100)) +
  geom_bar(stat = "identity", position = "dodge", color = "black", size = 0.4) +
  facet_grid(
             rows = vars(treat),
             cols = vars(outcome)) +
  geom_text(aes(y = values*100 + 6, 
                label = paste0(round(values*100, 0), '%')), 
            stat = 'identity', 
            family = "lmroman",
            position = position_dodge(.75), 
            size = 3) +
  scale_fill_manual(values = c("white", "darkgrey")) +
  #scale_fill_brewer(palette = 10) +
  theme_bw() +
  coord_flip() +
  xlab("") +
  theme(text=element_text(size=10, family="lmroman"),
        panel.grid.minor = element_blank(), 
        #panel.grid.major.x = element_blank(), 
        panel.grid.minor.y = element_blank(),
        panel.grid.major.y = element_blank(),
        legend.title = element_blank(),
        axis.title.x = element_blank(),
        panel.border = element_blank(),
        axis.line.x = element_line(),
        axis.ticks.y = element_blank(),
        strip.background = element_blank(),
        panel.background = element_rect(fill = "transparent",colour = NA),
        plot.background = element_rect(fill = "transparent",colour = NA)
  ) 


ggsave(plot = sample_validation, "./BKN survey/apsr_kuipers_replication_file/_4_outputs/figures/3_figure_a4.png", width = 9, height = 5, bg = "transparent")

